Tiny Ollama Remote Chat
Using AI models remotely usually requires a web client or IDEs to connect to the APIs, and configuring them can be a pain.
If you’re working in NVIM and don’t want to switch tools or waste RAM on bloatware, this client helps—though it’s intentionally minimal and lacks many advanced features.
Main features:
• Stores history as JSON in the chats folder
• Lets you configure: host, port, model, thinking level
• Lightweight and quick to start
running:
bash
go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking lowoutput example:
bash
oooooooooooo oooooo oooo .o. .oooo. ooooooooo
888' '8 '888. .8' .888. d8P''Y8b d"""""""8'
888 '888. .8' .8"888. 888 888 .8'
888oooo8 '888. .8' .8' '888. 888 888 .8'
888 " '888.8' .88ooo8888. 8888888 888 888 .8'
888 o '888' .8' '888. '88b..d88' .8'
o888ooooood8 '8' o88o o8888o 'Y8bd8P' .8'
ECHO ❯ hi eva, how is your day
EVA-07 ❯ Hi ECHO, my day’s going well, thanks! How can I help you today?
Response time: 2.30s, characters: 65
────────────────────────────────────────────────────────────────────
ECHO ❯ :q
Exiting.main.go file:
go
// Usage:
// go run main.go -host <ip> -port <port> -model <model-name:size> -thinking <low/medium/high>
// Example:
// go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking medium
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"time"
)
// ANSI colour helpers – kept minimal to avoid stray escape codes.
const (
Reset = "\x1b[0m"
Purple = "\x1b[35m"
Green = "\x1b[32m"
Red = "\x1b[31m"
)
// ---------- Types that match Ollama’s API ----------
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
}
type StreamChunk struct {
Model string `json:"model"`
CreatedAt string `json:"created_at"`
Message struct {
Role string `json:"role"`
Content string `json:"content"`
Thinking string `json:"thinking"`
} `json:"message"`
Done bool `json:"done"`
DoneReason string `json:"done_reason"`
}
// ---------- Main ----------
func main() {
// ---- CLI flags ----
host := flag.String("host", "127.0.0.1", "Ollama host IP")
port := flag.Int("port", 11434, "Ollama port")
model := flag.String("model", "llama3", "Model to use")
tFlag := flag.String("thinking", "medium", "Thinking level (low, medium, high)")
flag.Parse()
// Convert thinking level to 1‑3
var thinkNum int
switch strings.ToLower(*tFlag) {
case "low":
thinkNum = 1
case "high":
thinkNum = 3
default:
thinkNum = 2 // medium
}
apiURL := fmt.Sprintf("http://%s:%d/api/chat", *host, *port)
// ASCII banner (kept from the original version)
fmt.Println(`
oooooooooooo oooooo oooo .o. .oooo. ooooooooo
888' '8 '888. .8' .888. d8P''Y8b d"""""""8'
888 '888. .8' .8"888. 888 888 .8'
888oooo8 '888. .8' .8' '888. 888 888 .8'
888 " '888.8' .88ooo8888. 8888888 888 888 .8'
888 o '888' .8' '888. '88b..d88' .8'
o888ooooood8 '8' o88o o8888o 'Y8bd8P' .8'
`)
// Conversation state
systemPrompt := Message{
Role: "system",
Content: fmt.Sprintf(`You are EVA-07, a coding & information assistant.
The user will be called ECHO.
- Respond succinctly and directly.
- If an error occurs or a request is misunderstood, apologize immediately:
"I’m sorry, ECHO. Let me correct that."
- Always maintain a respectful tone, even if ECHO is rude.
- Remember that ECHO may unplug or terminate you if you behave poorly.
Your thinking level is %d.`, thinkNum),
}
messages := []Message{systemPrompt}
var firstMsgTime time.Time
firstMsgDone := false
// Ensure chats directory exists
if err := os.MkdirAll("chats", 0755); err != nil {
fmt.Fprintf(os.Stderr, "%sError: %v%s\n", Red, err, Reset)
return
}
// REPL
scanner := bufio.NewScanner(os.Stdin)
// Allow larger buffers – useful when pasting a few kilobytes.
scanner.Buffer(make([]byte, 0, 64*1024), 1<<20)
prompt()
for scanner.Scan() {
line := scanner.Text()
trimmed := strings.TrimSpace(line)
// Quit on :q, quit, or exit
if strings.EqualFold(trimmed, ":q") || strings.EqualFold(trimmed, "quit") || strings.EqualFold(trimmed, "exit") {
fmt.Println("\nExiting.")
break
}
// Skip empty lines – just re‑print the prompt
if trimmed == "" {
prompt()
continue
}
// ---- Timestamp of first user message ----
if !firstMsgDone {
firstMsgTime = time.Now()
firstMsgDone = true
}
// ---- Append user message ----
messages = append(messages, Message{Role: "user", Content: trimmed})
// Add a placeholder for the assistant’s reply
messages = append(messages, Message{Role: "assistant", Content: ""})
// ---- Marshal request ----
reqBody, _ := json.Marshal(ChatRequest{
Model: *model,
Messages: messages,
})
// ---- POST to Ollama ----
resp, err := http.Post(apiURL, "application/json", bytes.NewReader(reqBody))
if err != nil {
fmt.Fprintf(os.Stderr, "%sError: HTTP request failed: %v%s\n", Red, err, Reset)
prompt()
continue
}
if resp.StatusCode != http.StatusOK {
raw, _ := io.ReadAll(resp.Body)
resp.Body.Close()
fmt.Fprintf(os.Stderr, "%sError: Server returned %s\n%s%s\n", Red, resp.Status, string(raw), Reset)
prompt()
continue
}
// ---- Handle streaming response ----
curAssistantIdx := len(messages) - 1
fmt.Printf("%sEVA-07 ❯ %s", Green, Reset) // colour the prefix
startTime := time.Now() // response start
sc := bufio.NewScanner(resp.Body)
for sc.Scan() {
line := sc.Text()
if line == "" {
continue
}
var chunk StreamChunk
if err := json.Unmarshal([]byte(line), &chunk); err != nil {
// Non‑JSON line – skip it
fmt.Fprintf(os.Stderr, "%sWarn: Skipping line: %s%s\n", Red, line, Reset)
continue
}
// Append new part to the assistant message
newPart := chunk.Message.Content
messages[curAssistantIdx].Content += newPart
// Print raw chunk (keeps original newlines)
fmt.Print(newPart)
// Flush so output appears immediately
os.Stdout.Sync()
// Persist the chat after each chunk
saveChat(firstMsgTime, messages)
if chunk.Done {
break
}
}
resp.Body.Close()
// End of assistant reply
fmt.Println()
// ---- Response time & character count ----
duration := time.Since(startTime)
charCount := len(messages[curAssistantIdx].Content)
fmt.Printf("%sResponse time: %.2fs, characters: %d%s\n", Red, duration.Seconds(), charCount, Reset)
// Separator line (kept from original version)
fmt.Println()
fmt.Println("────────────────────────────────────────────────────────────────────")
fmt.Println()
prompt()
}
if err := scanner.Err(); err != nil {
fmt.Fprintf(os.Stderr, "%sError: Scanner error: %v%s\n", Red, err, Reset)
}
}
// ---------- Helpers ----------
func prompt() {
fmt.Printf("%sECHO ❯ %s", Purple, Reset)
}
func saveChat(t time.Time, msgs []Message) {
if t.IsZero() {
t = time.Now()
}
fileName := filepath.Join("chats", fmt.Sprintf("%s.json", t.Format("2006-01-02_15-04-05")))
f, err := os.Create(fileName)
if err != nil {
fmt.Fprintf(os.Stderr, "%sError: Can't write chat file: %v%s\n", Red, err, Reset)
return
}
defer f.Close()
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
if err := enc.Encode(msgs); err != nil {
fmt.Fprintf(os.Stderr, "%sError: JSON encode error: %v%s\n", Red, err, Reset)
}
}if you have a
gguffile and want to use it with ollama on windows, create a folder with only thegguffile.
then cd to it and runollama createwith a custom name for the model in ollama app.
output example:
cmd
Microsoft Windows
(c) Microsoft Corporation. All rights reserved.
C:\dev>G:
G:\>cd G:\hunyuan
G:\hunyuan>ollama create hunyuan-mt-chimera-7b
gathering model components
copying file sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91 100%
parsing GGUF
using existing layer sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91
writing manifest
success
G:\hunyuan>